Local Getis-Ord G Statistic

suppressPackageStartupMessages(require(spdep))
suppressPackageStartupMessages(require(sf))
suppressPackageStartupMessages(require(tidycensus))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(tmap))
tmap_mode("view")
tract <- tidycensus::get_acs( # Taking data from census
  geography = 'tract',
  variables = c("Households_" = 'B22010_001', "SNAP_" = 'B22010_002'),
  state="MI",
  county=c("Oakland", "Macomb", "Wayne"),
  geometry = TRUE,
  output = 'wide') %>% 
  st_transform(4326) %>% 
  mutate(
    pct_snap = SNAP_E/Households_E
    # pct_snap = ifelse(is.nan(pct_snap), 0, pct_snap)
    # pct_snap = ifelse(is.na(pct_snap), 0, pct_snap)
  ) %>% 
  filter(
    as.vector(sf::st_area(.)) > 0,
    st_coordinates(st_centroid(.))[,2] > 33.667,
    !is.na(pct_snap),
    !is.nan(pct_snap) # Lets remove the NA values
  )

suppressMessages(tmap_mode('plot'))
tm_shape(tract) + 
  tm_polygons(
    col='pct_snap',
    palette = 'Blues',
    border.col = 'white',
    lwd=0.3,
    style = 'quantile',
    n=5,
    contrast=c(0,1),
    title = "Share of Households (0 to 1)") +
  tm_shape(tract %>% st_union()) +  # adding a nice border!
  tm_polygons(col = NA, alpha = 0, border.col = 'black') +
  tm_layout(main.title = "Households on Food Assistance (SNAP)")

Hot Spots and Cold Spots

tract_nb <- poly2nb(tract,queen=T) %>% 
  include.self() # This turns it into Gi*! Otherwise, it's just G
tract_W <- nb2listw(tract_nb, 
                    style="W",
                    zero.policy = TRUE)
tract_local_g <- spdep::localG(
  x = tract$pct_snap,
  listw = tract_W, 
  zero.policy = TRUE) %>% 
  as.vector()

# I made us another function!
make_g_bin <- function(v){
  case_when(
    v > -1.96 & v < 1.96 ~ "Insignificant",
    v >= 1.96 & v < 2.58 ~ "Hotspot (>1.96)",
    v >= 2.58 ~ "Hotspot (>2.58)",
    v < -1.96 & v > -2.58 ~ 'Coldspot (<-1.96)',
    v < -2.58 ~ 'Coldspot (<-2.58)',
    TRUE ~ 'Unknown'
  ) %>% as.factor() %>% 
    ordered(c(
      "Unknown",
      "Coldspot (<-2.58)",
      "Coldspot (<-1.96)",
      "Insignificant",
      "Hotspot (>1.96)",
      "Hotspot (>2.58)"))
}

g_palette <- c('grey','#0571B0','#92C5DE','cornsilk','#F4A582','#CA0020')
# make_g_bin(v = tract_local_g) 
# Apply our colors on the fly to the 'col' column
tract %>% mutate(G=make_g_bin(tract_local_g)) %>% 
  tm_shape() +
  tm_polygons(
    col = "G",
    palette = g_palette,
    border.col = 'grey80',
    lwd=0.4,
    title = "Local Getis-Ord G") +
  tm_layout(main.title = "Households on Food Assistance (SNAP)")

tract %>% mutate(G=make_g_bin(tract_local_g)) %>% 
  tm_shape() +
  tm_polygons(
    col = "G",
    palette = g_palette,
    border.col = 'grey80',
    alpha = 0.5,
    lwd=0.4,
    title = "Local Getis-Ord G") +
  tm_shape(st_union(tract)) + 
  tm_polygons(zindex = 200,lwd=2,alpha = 0,border.col = "black")+
  tm_tiles(leaflet::providers$Stamen.TerrainLabels) +
  tm_layout(main.title = "Households on Food Assistance (SNAP)")
suppressMessages(tmap_mode("view"))
tmap_options(basemaps = c("Stamen.TonerLite","Stamen.Terrain","Esri.WorldTopoMap","Esri.WorldImagery"))
  tract %>% mutate(G=make_g_bin(tract_local_g)) %>%
  tm_shape() +
  tm_polygons(
    col = "G",
    palette = g_palette,
    border.col = 'grey80',
    alpha = 0.5,
    lwd=0.4,
    title = "Local Getis-Ord G",
    popup.vars=c("Tract: " = "NAME", "Percent of households on food assistance (x 100)" = "pct_snap", "Classification: " = "G"))+
  tm_shape(st_union(tract)) +
  tm_polygons(zindex = 200,lwd=2,alpha = 0,border.col = "black")+
  tm_tiles(leaflet::providers$Stamen.TerrainLabels) +
  tm_layout(main.title = "Detroit Metro Households on Food Assistance")

Conclusion: Detroit and its surrounding areas were selected for local Getis-Ord G and local Moran’s I spatial analysis. Data of Oakland, Macomb, Wayne counties in the state of Michigan were collected from the census to be analyzed. In our map, we have classified red as hotspots i.e. areas where red is where people use the most food stamps, here the food stamp data is represented as SNAP in the data sheet, and on the other hand, blue is meant as coldspots meaning people in those areas using less stamps for food. We see from the map that there is a large clustering of red in the Detroit area and a small clustering in the Pontiac and Inkster areas. On the other hand, areas like Rochester, Milford, Wixom, Berkley, Clawson, Farmington, Plymouth, Livonia etc. have high and low clustering in blue. That is, most people in the Detroit region depend on food stamps provided by the government to meet their food needs.

Local Moran’s I

(Univariate)

tract_nb <- poly2nb(tract,queen=T)
tract_W <- nb2listw(tract_nb, 
                    style="W",
                    zero.policy = FALSE)

tract_local_i <- spdep::localmoran(
  x = tract$pct_snap, 
  listw = tract_W) %>% 
  as_tibble()

tract_local_i
## # A tibble: 1,164 × 5
##    Ii         E.Ii          Var.Ii      Z.Ii       `Pr(z != E(Ii))`
##    <localmrn> <localmrn>    <localmrn>  <localmrn> <localmrn>      
##  1  0.3722461 -2.656096e-04 0.051292905  1.6447932 0.1000124552    
##  2 -0.1895189 -1.044032e-04 0.017269309 -1.4413706 0.1494800072    
##  3  0.2504802 -2.495911e-04 0.036087732  1.3198547 0.1868835331    
##  4  1.9432558 -1.302028e-03 0.301675376  3.5403886 0.0003995383    
##  5  2.8093032 -7.619053e-03 0.971157312  2.8584465 0.0042572084    
##  6 -0.2993126 -5.344158e-04 0.123917467 -0.8487554 0.3960174002    
##  7  0.3604603 -3.315454e-04 0.042570536  1.7486472 0.0803520271    
##  8  0.3669423 -3.425703e-04 0.079448609  1.3030465 0.1925588791    
##  9  1.4714524 -1.059055e-03 0.245438980  2.9722608 0.0029561550    
## 10  0.1385828 -6.308377e-06 0.001463526  3.6226720 0.0002915754    
## # … with 1,154 more rows

Preparing the LISA value

# Transferring values from test into our sf object
tract$lisa_I <- tract_local_i$Ii
tract$lisa_EI <- tract_local_i$E.Ii
tract$lisa_Z <- tract_local_i$Z.Ii
tract$lisa_p <- p.adjust(tract_local_i$`Pr(z != E(Ii))`,
                         method = "fdr")

# A functino to bin I values
make_i_bin <- function(v){
  case_when(
    v <= 0.05 ~ "Significant",
    v > 0.05 ~ "Insignificant",
    is.na(v) ~ "Unknown",
    TRUE ~ 'Unknown'
  ) %>% as.factor() %>% 
    ordered(c(
      "Unknown",
      "Insignificant",
      "Significant"))
}

Creating a Lag vector

require(ggplot2)# load ggplot2 package 
## Loading required package: ggplot2
tract$lag_snap <- spdep::lag.listw(x = tract_W, var = tract$pct_snap)

# creating moran_plot function
moran_plot = function(var, w_var, xlab, ylab, main_title = NULL, normalize = T){
  if(missing(main_title)){
    main_title = "Moran's I Plot"
  }
  if(normalize){
    var = scale(var) %>% as.vector
    w_var = scale(w_var) %>% as.vector()
  }
  # lebel the values as "HH", "HL", "LL", "LH" 
  plot_dat = data.frame(var,w_var,stringsAsFactors = F) %>% 
    mutate(quad = case_when(
        var > 0 & w_var > 0 ~ "HH",
        var > 0 & w_var < 0 ~ "HL",
        var < 0 & w_var < 0 ~ "LL",
        var < 0 & w_var > 0 ~ "LH",
        TRUE ~ 'Unknown'))
  #plotting the data in quadrants
  plot_dat %>% ggplot() +
    geom_vline(xintercept = 0) +
    geom_hline(yintercept = 0) +
    geom_point(aes(x=var,y=w_var,color=quad)) +
    xlab(xlab) +
    ylab(ylab) +
    geom_smooth(
      mapping = aes(x=var,y=w_var),
      formula = 'y ~ x',
      method = 'lm',
      color="black",
      size=0.5,
      se=F) +
    scale_color_manual(
      name = "", 
      values = c('Unknown' = 'grey','HH'="firebrick",'HL'="brown1",'LL'="navy",'LH'="cornflowerblue")) +
    ggtitle(main_title)
}
moran_plot( #giving values to the function 
  var = tract$pct_snap, 
  w_var = tract$lag_snap, 
  xlab = "Households on SNAP (%)", 
  ylab = "Lagged Households on SNAP (%)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

The Moran’s I plot here suggests that there is a positive spatial autocorrelation between SNAP usage which means that if SNAP usage increases, the SNAP usage of its neighbors will increase too.

tract %>% tm_shape() +
  tm_polygons(
    col = "lag_snap",
    palette = '-RdBu',
    border.col = 'grey80',
    lwd=0.4, 
    style = 'cont',
    title = "Lagged Households on SNAP (%)") +
  tm_layout(main.title = "Households on Food Assistance (SNAP)")

Classifying quadrants

make_i_quads <- function(v, w_v, p, normalize = T){
  if(normalize){
    v = scale(v) %>% as.vector
    w_v = scale(w_v) %>% as.vector()
  }
  case_when(
    p > 0.05 ~ "Insignificant", ### Make sure that 'p' has been adjusted!
    v < 0 & w_v < 0 ~ "Low-Low",
    v < 0 & w_v > 0 ~ "Low-High",
    v > 0 & w_v < 0 ~ "High-Low",
    v > 0 & w_v > 0 ~ "High-High",
    TRUE ~ 'Unknown'
  ) %>% as.factor() %>% 
    ordered(c(
      "Insignificant",
      "Low-Low",
      "Low-High",
      "High-Low",
      "High-High"))
}

i_palette <- c('cornsilk','#0571B0','#92C5DE','#F4A582','#CA0020') 
tract %>% mutate(quad = make_i_quads(v = pct_snap, w_v = lag_snap,p = lisa_p)) %>% 
  tm_shape() +
  tm_polygons(
    col = "quad",
    palette = i_palette,
    border.col = 'grey70',
    lwd=0.4,
    title = "Local Moran's I") + 
  tm_layout(main.title = "Local Moran's I: SNAP Benefit Usage in LA County")

Conclusion: From the local Moran-Eye map, we see that there is a greater clustering of food in the Detroit area, and some blue areas to the north and northwest of the red zone. Historically, Detroit was a very successful auto industrial city. But in the mid-1900s, competition in the auto industry increased. In order to compete with Japanese and German auto companies, these companies began to relocate to other cities to build new, more efficient plants. Chrysler, one of Detroit’s dominant auto industries, went bankrupt in 1979. All these economic chaos has eventually made Detro a dying city and currently it has the highest unemployment rate in the entire USA (over 16%). So people here are more dependent on food stamps because they don’t have enough money to buy food. Besides, the wealthier white population migrated north and northwest of Detroit due to the economic crisis, declining property values, lack of jobs, etc., which indicates the reason for the blue clustering of those places.